##### REPLICATION FILE 
##### Validating the feeling thermometer as a measure of partisan affect in multi-party systems 
##### Noam Gidron, Lior Sheffer, Guy Mor 
##### Electoral Studies 
#######################################################################################################
##### This is the replication masterfile producing the analyses in the article. The primary analyses 
##### utilize a draft version of the The Israel Polarization Panel Dataset, 2019–2021 (IPP). Some 
##### additional analyses utilize the published version of the IPP, available here:
##### https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/A6AA4X&version=1.1
#######################################################################################################
##### Data for the textual analyses is loaded here after pre-processing and manual fixes of the 
##### lemmatization of the Hebrew text and machine translation of tokens into English.
##### For the code used for text processing, see text_preprocessing.py 
#######################################################################################################


library(readr)
library(dplyr)
library(ggplot2)
library(magrittr)
library(kableExtra)
library(stargazer)
library(gridExtra)
library(lme4)
library(parameters)
library(grid)
library(tidyr)
library(forcats)
library(dotwhisker)
library(texreg)
library(broom.mixed)
library(sjPlot)

#### TEXT ANALYSIS PLOTS

### read processed files after manual fixes to lemmatization and English translation. See .py file for pre-processing information.

likud_stopwords_rem <- read_csv("likud_elasticnet_predictivewords_w_sentiment_stopwords_removed.csv")
kahollavan_stopwords_rem <- read_csv("kahol_lavan_elasticnet_predictivewords_w_sentiment_stopwords_removed.csv")
avoda_stopwords_rem <- read_csv("avoda_elasticnet_predictivewords_w_sentiment_stopwords_removed.csv")
aguda_stopwords_rem <-  read_csv("aguda_elasticnet_predictivewords_w_sentiment_stopwords_removed.csv")
jointlist_stopwords_rem <- read_csv("joint_list_elasticnet_predictivewords_w_sentiment_stopwords_removed.csv")

likud_pos <- likud_stopwords_rem %>% top_n(20, weight) %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)
likud_neg <- likud_stopwords_rem %>% top_n(20, -weight) %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)

kh_pos <- kahollavan_stopwords_rem %>% top_n(20, weight) %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)
kh_neg <- kahollavan_stopwords_rem %>% top_n(20, -weight) %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)

joint_pos <- jointlist_stopwords_rem %>% top_n(20, weight)  %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)
joint_neg <- jointlist_stopwords_rem %>% top_n(20, -weight)  %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)

aguda_pos <- aguda_stopwords_rem %>% top_n(20, weight)  %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)
aguda_neg <- aguda_stopwords_rem %>% top_n(20, -weight)  %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)

avoda_pos <- avoda_stopwords_rem %>% top_n(20, weight)  %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)
avoda_neg <- avoda_stopwords_rem %>% top_n(20, -weight)  %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)

# generate Table 1
data.frame(likud_pos, likud_neg,  kh_pos, kh_neg, joint_pos,
           joint_neg, aguda_pos, aguda_neg,avoda_pos, avoda_neg) %>% kable(format = "latex", booktabs=T)

## read output files prior to stopword removal - for analyses in Table 2 and Figure 1

likud <- read_csv("likud_elasticnet_predictivewords_w_sentiment.csv")
kahollavan<- read_csv("kahol_lavan_elasticnet_predictivewords_w_sentiment.csv")
avoda <- read_csv("avoda_elasticnet_predictivewords_w_sentiment.csv")
aguda <-  read_csv("aguda_elasticnet_predictivewords_w_sentiment.csv")
jointlist <- read_csv("joint_list_elasticnet_predictivewords_w_sentiment.csv")

# generate Table 2


m1 <- lm(data=likud, weight ~ sentiwords_y)
m2 <- lm(data=kahollavan, weight ~ sentiwords_y)
m3 <- lm(data=jointlist, weight ~ sentiwords_y)
m4 <- lm(data=aguda, weight ~ sentiwords_y)
m5 <- lm(data=avoda, weight ~ sentiwords_y)


stargazer(m1, m2, m3, m4, m5, type="latex", df = F, align=T, header=F,
          column.labels = c("Likud", "Kahol Lavan", "Joint List", "Aguda", "Avoda"),
          dep.var.labels=c("Predictive weight"),
          covariate.labels = c("Sentiment"), single.row = F, 
          title="Word predictive weight by sentiment polarity",
          table.placement = "H", out.header = F, report=('vcs*'))


##### generate Figure 1
## generate top panel

# plot_label is used to suppress label information for some observations for better readability


sentiwords_q1 <- as.numeric(quantile(likud$sentiwords_y, 0.01))
sentiwords_q2 <- quantile(likud$sentiwords_y, 0.99) %>% as.numeric()

weight_q1 <- quantile(likud$weight, 0.1) %>% as.numeric()
weight_q2 <- quantile(likud$weight, 0.9) %>% as.numeric()

likud$plot_label <- T
likud$plot_label[likud$sentiwords_y == 0] <-F

likud <- likud %>% mutate(plot_label = ifelse((weight > weight_q1) & (weight < weight_q2) & (sentiwords_y > 
                                                                                               sentiwords_q1) & (sentiwords_y < sentiwords_q2),
                                              F, plot_label))



likud_plot <- ggplot(likud, aes(weight, sentiwords_y, color=sentiwords_y)) + 
  geom_text(aes(label = ifelse(plot_label, english_y, "")), size = 5) +
  geom_smooth(method="lm", color = "saddlebrown") + theme_dark() + scale_color_distiller(palette = "RdYlGn", direction = 1) +
  geom_point(data = likud %>% filter(!plot_label), alpha=0.2) + xlab("Predictive weight") + 
  ylab("Sentiment score") +
  labs(color = "Sentiment \npolarity") + ggtitle("Likud") + 
  theme(
    text = element_text(color = "grey20"),
    plot.caption = element_text(hjust = 0, face = "italic")) +
  theme(plot.title = element_text(hjust = 0.5, family = "Decima WE"))



## generate bottom panel


sentiwords_q1 <- as.numeric(quantile(kahollavan$sentiwords_y, 0.01))
sentiwords_q2 <- quantile(kahollavan$sentiwords_y, 0.99) %>% as.numeric()

weight_q1 <- quantile(kahollavan$weight, 0.05) %>% as.numeric()
weight_q2 <- quantile(kahollavan$weight, 0.95) %>% as.numeric()

kahollavan$plot_label <- T
kahollavan$plot_label[kahollavan$sentiwords_y == 0] <-F

kahollavan <- kahollavan %>% mutate(plot_label = ifelse((weight > weight_q1) & (weight < weight_q2) & (sentiwords_y > 
                                                                                                         sentiwords_q1) & (sentiwords_y < sentiwords_q2),
                                                        F, plot_label))


kh_plot <- ggplot(kahollavan, aes(weight, sentiwords_y, color=sentiwords_y)) + 
  geom_text(aes(label = ifelse(plot_label, english_y, "")), size = 4.5) +
  geom_smooth(method="lm", color = "saddlebrown") + theme_dark() + scale_color_distiller(palette = "RdYlGn", direction = 1) +
  geom_point(data = likud %>% filter(!plot_label), alpha=0.2) + xlab("Predictive weight") + 
  ylab("Sentiment score") +
  labs(color = "Sentiment \npolarity") + ggtitle("Kahol Lavan") + 
  theme(
    text = element_text(color = "grey20"),
    plot.caption = element_text(hjust = 0, face = "italic")) +
  theme(plot.title = element_text(hjust = 0.5, family = "Decima WE"))



#extract legend
#https://github.com/hadley/ggplot2/wiki/Share-a-legend-between-two-ggplot2-graphs
g_legend<-function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)}

mylegend<-g_legend(likud_plot)

# generate combined graph for Figure 1
combined <- grid.arrange(arrangeGrob(likud_plot + theme(legend.position="none"),
                                     kh_plot + theme(legend.position="none"),
                                     nrow=2))

################################

### load panel data file - IPP draft

df <- read_csv("all_waves.csv")
df %<>% distinct(uniqueid, .keep_all = T)

# discard unnecessary variables
df %<>% select(uniqueid, left_right_1, sex, rel, edu, osek, ses, migzar, income, vote2015, w3first_vc_str, w5first_vc_str,
               matches("^w\\d(th|sd|dg)"))

# convert to long format
df_long <- pivot_longer(df, cols = matches("^w\\d(th|sd|dg)"), names_to = c("wave", "var", "party"),
                        names_pattern = "w(\\d)(.*)_p?(.*)")


# load party number-to-name link file
link <- read_csv("number2party.csv")

link$wave %<>% as.character()
link$num %<>% as.character()

# join long dataframe with link file
df_long <- left_join(df_long, link, by=c("wave"="wave", "var"="var", "party"="num"))

### create data in dyadic format

df_sd <- df_long %>% filter(var == "sd")
df_th <- df_long %>% filter(var == "th")
df_dg <- df_long %>% filter(var == "dg")

df_th %<>% rename(th = value) %>% select(-var, -party)
df_sd %<>% rename(sd = value) %>% select(-var, -party)
df_dg %<>% rename(dg = value) %>% select(-var, -party)


df_dyadic <- left_join(df_th, df_sd)
df_dyadic <- left_join(df_dyadic, df_dg)

df_dyadic <- df_dyadic %>% distinct()

# reverse scales for thermometer scores and dictator game scores
rev_scale <- function(x){
  (max(x, na.rm = T)) - x
}

df_dyadic$th %<>% rev_scale()
df_dyadic$dg %<>% rev_scale()

# create dyad indentifiers
df_dyadic %<>% mutate(dyadid = paste0(uniqueid,"_",partyname))

# parties to keep - remove parties with no dictator game data
parties <- df_dyadic %>% group_by(partyname) %>% summarize(m = mean(is.na(dg))) %>% filter(m != 1) %>% pull(partyname)

# add demeaned variables to the data
d <- cbind(
  df_dyadic,
  demean(df_dyadic, select = c("th", "sd", "dg"), group = "dyadid") # from package "parameters"
)

# create dataframe for social distance regression and dictator game regression, removing missing data for each
d_sd <- d %>% filter(partyname %in% parties) %>% 
  filter(!is.na(sd), !is.na(th_within), !is.na(th_between))
d_dg <- d %>% filter(partyname %in% parties) %>% 
  filter(!is.na(dg), !is.na(th_within), !is.na(th_between))

#### generate Figure 3


# plotting aid
grid_arrange_shared_legend <- function(...) {
  plots <- list(...)
  g <- ggplotGrob(plots[[1]] + theme(legend.position="bottom"))$grobs
  legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
  lheight <- sum(legend$height)
  grid.arrange(
    do.call(arrangeGrob, lapply(plots, function(x)
      x + theme(legend.position="none"))),
    legend,
    ncol = 1,
    heights = unit.c(unit(1, "npc") - lheight, lheight))
}


## Dot plots - figure 3

d %>% group_by(partyname, wave) %>% summarize(sd = mean(sd, na.rm=T), th = mean(th, na.rm=T)) %>% filter(!is.na(sd)) %>% ggplot(aes(th, sd)) + geom_point(aes(color=partyname), size=7) + geom_text(aes(label = wave)) + scale_color_brewer(palette="Set2") + theme_light() + labs(title = "Social distance", x = "Mean thermometer score", y = "Mean social distance score", color = "Evaluated party") +
  theme(text = element_text(family = "Decima WE", color = "grey20", size = 15), plot.title = element_text(hjust = 0.5)) -> dotssdp

d %>% group_by(partyname, wave) %>% summarize(dg = mean(dg, na.rm=T), th = mean(th, na.rm=T)) %>% filter(!is.na(dg)) %>% ggplot(aes(th, dg)) + geom_point(aes(color=partyname), size=7) + geom_text(aes(label = wave)) + scale_color_brewer(palette="Set2") + theme_light() + labs(title = "Dictator game", x = "Mean thermometer score", y = "Mean dictator game score", color = "Evaluated party") +
  theme(text = element_text(family = "Decima WE", color = "grey20", size = 15), plot.title = element_text(hjust = 0.5)) -> dotsdgp

grid_arrange_shared_legend(dotssdp, dotsdgp)

#### Generate Figure 4

### Correlation plots

df_dyadic %>% group_by(wave, partyname) %>% 
  summarize(cor = cor(th, sd, use="pairwise")) %>%
  filter(!is.na(cor)) -> corthsd

corthsd$partyname %<>% fct_relevel(c("Likud", "Kahol Lavan", "Yahadut Hatorah", "Balad-Raam",
                                     "Hareshima Hameshutefet", "Meretz", "Hamahane Hademokrati", "Haavoda-Gesher-Meretz"))

corthsd$wave %<>% as.numeric()

corthsd %>% 
  ggplot(aes(wave, cor)) + 
  geom_point(aes(group=partyname, color=partyname), size=4) +
  geom_line(aes(group=partyname, color=partyname), size = 2) +
  geom_line(data = corthsd %>% mutate(partyname = fct_collapse(partyname, "Meretz" = c("Meretz", "Hamahane Hademokrati", "Haavoda-Gesher-Meretz"),
                                                               "Balad-Raam" = c("Balad-Raam", "Hareshima Hameshutefet"))),
            aes(group=partyname, color=partyname), linetype="dashed") +
  scale_x_continuous(labels = c("March 19", "April 19 #1", "April 19 #2", "May 19", "Sep 19", "March 20", "May 20"), breaks = 1:max(corthsd$wave)) +
  #ylim(0, 0.55) +
  geom_vline(xintercept = 2.5) +
  geom_vline(xintercept = 4.5) +
  geom_vline(xintercept = 5.5) + 
  annotate("text", x = 2.35, y = 0.2, angle = 90, label = "April Elections", vjust = 1.2, parse = FALSE) +
  annotate("text", x = 4.35, y = 0.2, angle = 90, label = "September Elections", vjust = 1.2, parse = FALSE) +
  annotate("text", x = 5.35, y = 0.2, angle = 90, label = "March Elections", vjust = 1.2, parse = FALSE) +
  theme_light() +
  scale_color_brewer(palette = "Set2") +
  labs(title = "Social distance and feeling thermometer",
       colour = "Evaluated party",
       x = "",
       y = "Correlation") +
  theme(
    text = element_text(family = "Decima WE", color = "grey20", size = 15),
    plot.title = element_text(hjust = 0.5)) +
  scale_y_continuous(breaks = seq(0, 0.6, by = 0.10), limits = c(0, 0.6)) -> sdcorp

df_dyadic %>% group_by(wave, partyname) %>% 
  summarize(cor = cor(th, dg, use="pairwise")) %>%
  filter(!is.na(cor)) -> corthdg

corthdg$partyname %<>% fct_relevel(c("Likud", "Kahol Lavan", "Meretz", "Yahadut Hatorah", "Balad-Raam",
                                     "Hareshima Hameshutefet", "Hamahane Hademokrati"))

corthdg$wave %<>% as.numeric()

corthdg %>% 
  ggplot(aes(wave, cor)) + 
  geom_point(aes(group=partyname, color=partyname), size=4) +
  geom_line(aes(group=partyname, color=partyname), size = 2) +
  geom_line(data = corthdg %>% mutate(partyname = fct_collapse(partyname, "Meretz" = c("Meretz", "Hamahane Hademokrati", "Haavoda-Gesher-Meretz"),
                                                               "Balad-Raam" = c("Balad-Raam", "Hareshima Hameshutefet"))),
            aes(group=partyname, color=partyname), linetype="dashed") +
  scale_x_continuous(labels = c("March 19", "April 19 #1", "April 19 #2", "May 19", "Sep 19", "March 20", "May 20"), breaks = 1:max(corthsd$wave)) +
  #ylim(0, 0.55) +
  geom_vline(xintercept = 2.5) +
  geom_vline(xintercept = 4.5) +
  geom_vline(xintercept = 5.5) + 
  annotate("text", x = 2.35, y = 0.4, angle = 90, label = "April Elections", vjust = 1.2, parse = FALSE) +
  annotate("text", x = 4.35, y = 0.4, angle = 90, label = "September Elections", vjust = 1.2, parse = FALSE) +
  annotate("text", x = 5.35, y = 0.4, angle = 90, label = "March Elections", vjust = 1.2, parse = FALSE) +
  theme_light() +
  scale_color_brewer(palette = "Set2") +
  labs(title = "Dictator game and feeling thermometer",
       colour = "Evaluated party",
       x = "",
       y = "Correlation") +
  theme(
    text = element_text(family = "Decima WE", color = "grey20", size = 15),
    plot.title = element_text(hjust = 0.5)) +
  scale_y_continuous(breaks = seq(0, 0.6, by = 0.10), limits = c(0, 0.6)) -> dgcorp


grid_arrange_shared_legend(sdcorp, dgcorp)

### regression analyses

# without wave fixed effect
mod1_sd <- lmer(sd ~ th_between + th_within + (1 + th_within | uniqueid:partyname) + partyname,  data=d_sd, control = lmerControl(optimizer="Nelder_Mead"))
mod1_dg <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname,  data=d_dg, control = lmerControl(optimizer="Nelder_Mead"))

# with wave fixed effects
mod2_sd <- lmer(sd ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave),  data=d_sd, control = lmerControl(optimizer="Nelder_Mead"))
mod2_dg <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave),  data=d_dg, control = lmerControl(optimizer="Nelder_Mead"))


#### generate Table 3
stargazer(mod1_sd, mod2_sd, mod1_dg, mod2_dg, df = F, single.row = F, report=('vcs*'), align=T, header=F, dep.var.labels=c("Social Distance", "Dictator Game"), title="Random Effects Within-Between Models", table.placement = "H", out.header = F, covariate.labels = c("Thermometer \\textsubscript{BETWEEN}", "Thermometer \\textsubscript{WITHIN}"),
          omit = c("wave", "partyname", "Constant"), add.lines = list(c("Controls", "\\faClose", "\\faCheck", "\\faClose", "\\faCheck")))

### generate Table A.4 in SI

stargazer(mod1_sd, mod2_sd, mod1_dg, mod2_dg, covariate.labels = c("Thermometer \\textsubscript{BETWEEN}", "Thermometer \\textsubscript{WITHIN}", 
                                                                   "Party [ref: Balad-Raam] \\\\ \\-\\hspace{0.5cm}Haavoda-Gesher-Meretz", "\\-\\hspace{0.5cm}Hamahane Hademokrati", "\\-\\hspace{0.5cm}Hareshima Hameshutefet", "\\-\\hspace{0.5cm}Kahol Lavan", "\\-\\hspace{0.5cm}Likud", "\\-\\hspace{0.5cm}Meretz", "\\-\\hspace{0.5cm}Yahadut Hatorah", 
                                                                   "Wave [ref: Wave 1] \\\\ \\-\\hspace{0.5cm}Wave 2", "\\-\\hspace{0.5cm}Wave 3", "\\-\\hspace{0.5cm}Wave 4", "\\-\\hspace{0.5cm}Wave 5", "\\-\\hspace{0.5cm}Wave 6", "\\-\\hspace{0.5cm}Wave 7"), df = F, align=T, header=F, dep.var.labels=c("Social Distance", "Dictator Game"), single.row = T, title="Random Effects Within-Between Models", table.placement = "H", out.header = F, report='vc*s')



### generate Tables A.7 and A.8

# fit models without random slopes (random intercepts only)
mod1_sd_comp <- lmer(sd ~ th_between + th_within + (1 | uniqueid:partyname) + partyname,  data=d_sd, control = lmerControl(optimizer="Nelder_Mead"))
mod1_dg_comp <- lmer(dg ~ th_between + th_within + (1 | uniqueid:partyname) + partyname,  data=d_dg, control = lmerControl(optimizer="Nelder_Mead"))

# run likelihood ratio test comparing the previous random slopes model and the random intercepts model
anova(mod1_sd_comp, mod1_sd) -> sd_anova
anova(mod1_dg_comp, mod1_dg) -> dg_anova

# print Table A.7
sd_anova %>% kable(format = "latex")
# print Table A.8
dg_anova %>% kable(format = "latex")


### generate tables A.5 and A.6 in SI

# recreate data with demeaned variables, removing observations without voting information
d <- cbind(
  df_dyadic,
  demean(df_dyadic, select = c("th", "sd", "dg"), group = "dyadid") # from package "parameters"
) %>% filter(!is.na(w3first_vc_str))

# remove in-partisans
d$remove <- F
d$remove[d$partyname == "Likud" & d$w3first_vc_str == "likud"] <- T
d$remove[d$partyname == "Kahol Lavan" & d$w3first_vc_str == "kahol_lavan"] <- T
d$remove[d$partyname == "Haavoda-Gesher-Meretz" & d$w3first_vc_str == "avoda"] <- T
d$remove[d$partyname == "Meretz" & d$w3first_vc_str == "meretz"] <- T
d$remove[d$partyname == "Haavoda-Gesher-Meretz" & d$w3first_vc_str == "meretz"] <- T
d$remove[d$partyname == "Haavoda-Gesher-Meretz" & d$w3first_vc_str == "gesher"] <- T

d$remove[d$partyname == "Hamahane Hademokrati" & d$w3first_vc_str == "meretz"] <- T
d$remove[d$partyname == "Yahadut Hatorah" & d$w3first_vc_str == "yahadut_hatora"] <- T
d$remove[d$partyname == "Balad-Raam" & d$w3first_vc_str == "balad_raam"] <- T

d$remove[d$partyname == "Hareshima Hameshutefet" & d$w3first_vc_str == "balad_raam"] <- T
d$remove[d$partyname == "Hareshima Hameshutefet" & d$w3first_vc_str == "hadash_taal"] <- T

# create dataframes for out-partisans and in-partisans
d_nonpartisan <- d %>% filter(!remove)
d_partisan <- d %>% filter(remove)

d_sd_nonpartisan <- d_nonpartisan %>% filter(partyname %in% parties) %>% 
  filter(!is.na(sd), !is.na(th_within), !is.na(th_between))
d_dg_nonpartisan <- d_nonpartisan %>% filter(partyname %in% parties) %>% 
  filter(!is.na(dg), !is.na(th_within), !is.na(th_between))

d_sd_partisan <- d_partisan %>% filter(partyname %in% parties) %>% 
  filter(!is.na(sd), !is.na(th_within), !is.na(th_between))
d_dg_partisan <- d_partisan %>% filter(partyname %in% parties) %>% 
  filter(!is.na(dg), !is.na(th_within), !is.na(th_between))

# rerun models on outpartisan and inpartisan dataframes
mod1_sd_nonpartisan <- lmer(sd ~ th_between + th_within + (1 + th_within | uniqueid:partyname) + partyname,  data=d_sd_nonpartisan, control = lmerControl(optimizer="Nelder_Mead"))
mod1_dg_nonpartisan <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname,  data=d_dg_nonpartisan, control = lmerControl(optimizer="Nelder_Mead"))
mod2_sd_nonpartisan <- lmer(sd ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave),  data=d_sd_nonpartisan, control = lmerControl(optimizer="Nelder_Mead"))
mod2_dg_nonpartisan <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave),  data=d_dg_nonpartisan, control = lmerControl(optimizer="Nelder_Mead"))


mod1_sd_partisan <- lmer(sd ~ th_between + th_within + (1 + th_within | uniqueid:partyname) + partyname,  data=d_sd_partisan, control = lmerControl(optimizer="Nelder_Mead"))
mod1_dg_partisan <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname,  data=d_dg_partisan, control = lmerControl(optimizer="Nelder_Mead"))
mod2_sd_partisan <- lmer(sd ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave),  data=d_sd_partisan, control = lmerControl(optimizer="Nelder_Mead"))
mod2_dg_partisan <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave),  data=d_dg_partisan, control = lmerControl(optimizer="Nelder_Mead"))

# print Table A.5
stargazer(mod1_sd_nonpartisan, mod2_sd_nonpartisan, mod1_dg_nonpartisan, mod2_dg_nonpartisan, covariate.labels = c("Thermometer \\textsubscript{BETWEEN}", "Thermometer \\textsubscript{WITHIN}", 
                                                                                                                   "Party [ref: Balad-Raam] \\\\ \\-\\hspace{0.5cm}Haavoda-Gesher-Meretz", "\\-\\hspace{0.5cm}Hamahane Hademokrati", "\\-\\hspace{0.5cm}Hareshima Hameshutefet", "\\-\\hspace{0.5cm}Kahol Lavan", "\\-\\hspace{0.5cm}Likud", "\\-\\hspace{0.5cm}Meretz", "\\-\\hspace{0.5cm}Yahadut Hatorah", 
                                                                                                                   "Wave [ref: Wave 1] \\\\ \\-\\hspace{0.5cm}Wave 2", "\\-\\hspace{0.5cm}Wave 3", "\\-\\hspace{0.5cm}Wave 4", "\\-\\hspace{0.5cm}Wave 5", "\\-\\hspace{0.5cm}Wave 6", "\\-\\hspace{0.5cm}Wave 7"), df = F, align=T, header=F, dep.var.labels=c("Social Distance", "Dictator Game"), single.row = T, title="Random Effects Within-Between Models, Non-partisans", table.placement = "H", out.header = F, report=('vc*s'))

# print Table A.6
stargazer(mod1_sd_partisan, mod2_sd_partisan, mod1_dg_partisan, mod2_dg_partisan, covariate.labels = c("Thermometer \\textsubscript{BETWEEN}", "Thermometer \\textsubscript{WITHIN}", 
                                                                                                       "Party [ref: Balad-Raam] \\\\ \\-\\hspace{0.5cm}Haavoda-Gesher-Meretz", "\\-\\hspace{0.5cm}Hamahane Hademokrati", "\\-\\hspace{0.5cm}Hareshima Hameshutefet", "\\-\\hspace{0.5cm}Kahol Lavan", "\\-\\hspace{0.5cm}Likud", "\\-\\hspace{0.5cm}Meretz", "\\-\\hspace{0.5cm}Yahadut Hatorah", 
                                                                                                       "Wave [ref: Wave 1] \\\\ \\-\\hspace{0.5cm}Wave 2", "\\-\\hspace{0.5cm}Wave 3", "\\-\\hspace{0.5cm}Wave 4", "\\-\\hspace{0.5cm}Wave 5", "\\-\\hspace{0.5cm}Wave 6", "\\-\\hspace{0.5cm}Wave 7"), df = F, align=T, header=F, dep.var.labels=c("Social Distance", "Dictator Game"), single.row = T, title="Random Effects Within-Between Models, Partisans", table.placement = "H", out.header = F, report=('vc*s'))

##### create Figure 5 - whisker graph

sd_all <- left_join(add_rownames(as.data.frame(confint(mod1_sd, method="Wald"))),
                    tidy(mod1_sd),
                    by=c("rowname"="term")) %>% 
  mutate(group = "All", model = "SD") %>% select(lower = `2.5 %`, upper = `97.5 %`, estimate, term=rowname, group, model) %>% filter(term %in% c("th_between", "th_within"))

dg_all <- left_join(add_rownames(as.data.frame(confint(mod1_dg, method="Wald"))),
                    tidy(mod1_dg),
                    by=c("rowname"="term")) %>% 
  mutate(group = "All", model = "DG") %>% select(lower = `2.5 %`, upper = `97.5 %`, estimate, term=rowname, group, model) %>% filter(term %in% c("th_between", "th_within"))

sd_partisan <- left_join(add_rownames(as.data.frame(confint(mod1_sd_partisan, method="Wald"))),
                         tidy(mod1_sd_partisan),
                         by=c("rowname"="term")) %>% 
  mutate(group = "In-partisans", model = "SD") %>% select(lower = `2.5 %`, upper = `97.5 %`, estimate, term=rowname, group, model) %>% filter(term %in% c("th_between", "th_within"))

dg_partisan <- left_join(add_rownames(as.data.frame(confint(mod1_dg_partisan, method="Wald"))),
                         tidy(mod1_dg_partisan),
                         by=c("rowname"="term")) %>% 
  mutate(group = "In-partisans", model = "DG") %>% select(lower = `2.5 %`, upper = `97.5 %`, estimate, term=rowname, group, model) %>% filter(term %in% c("th_between", "th_within"))

sd_outpartisan <- left_join(add_rownames(as.data.frame(confint(mod1_sd_nonpartisan, method="Wald"))),
                            tidy(mod1_sd_nonpartisan),
                            by=c("rowname"="term")) %>% 
  mutate(group = "Out-partisans", model = "SD") %>% select(lower = `2.5 %`, upper = `97.5 %`, estimate, term=rowname, group, model) %>% filter(term %in% c("th_between", "th_within"))

dg_outpartisan <- left_join(add_rownames(as.data.frame(confint(mod1_dg_nonpartisan, method="Wald"))),
                            tidy(mod1_dg_nonpartisan),
                            by=c("rowname"="term")) %>% 
  mutate(group = "Out-partisans", model = "DG") %>% select(lower = `2.5 %`, upper = `97.5 %`, estimate, term=rowname, group, model) %>% filter(term %in% c("th_between", "th_within"))

dotwhisker <- bind_rows(sd_all, dg_all, sd_partisan, dg_partisan, sd_outpartisan,
                        dg_outpartisan)


# plotting aids

add_brackets <- function(p, brackets, face = "italic") {
  y_ind <- term <- estimate <- ymax <- ymin <- x <- NULL # not functional, just for CRAN check
  
  coef_layer <- 0
  repeat {
    coef_layer <- coef_layer + 1
    if ("x" %in% names(layer_data(p, i = coef_layer))) break
  }
  
  if (p$args$style == "dotwhisker") {
    pd <- left_join(p$data %>% mutate(xx = signif(estimate, 9)),
                    layer_data(p, i = coef_layer) %>% mutate(xx = signif(x, 9)), by = "xx") %>%
      left_join(layer_data(p, i = coef_layer - 1),
                by = c("colour", "y", "group", "PANEL", "ymin", "ymax", "xmax", "size", "alpha"))
  } else {
    pd <- left_join(p$data %>% mutate(xx = signif(estimate, 9)),
                    layer_data(p, i = coef_layer) %>% mutate(xx = signif(x, 9)), by = "xx")
    pd <- pd %>%
      mutate(ymin = y_ind,
             ymax = y_ind)
  }
  overhang <- max(pd$y_ind)/30
  overhang <- ifelse(overhang > .23, .23, overhang)
  farout <- ifelse(p$args$style == "distribution", max(pd$x, na.rm = TRUE) + 100, max(pd$xmax, na.rm = TRUE) + 100)
  p1 <- p + theme(plot.margin = unit(c(1, 1, 1, -1), "lines")) + ylab("")
  
  if (!is.list(brackets)) stop('Error: argument "brackets" is not a list')
  
  draw_bracket_label <- function(x, f = face) {
    top <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymax"] %>% max()
    bottom <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymin"] %>% min()
    shift <- max(abs(top - round(top)), abs(round(bottom) - bottom))
    top <- round(top) + shift
    bottom <- round(bottom) - shift
    
    annotation_custom(
      grob = grid::textGrob(label = x[1], gp = grid::gpar(cex = 1.2, fontface = f), rot = 90),
      xmin = farout, xmax = farout,
      ymin = (top + bottom)/2, ymax = (top + bottom)/2)
  }
  
  draw_bracket_vert <- function(x, oh = overhang) {
    top <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymax"] %>% max()
    bottom <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymin"] %>% min()
    shift <- min(max(abs(top - round(top)), abs(round(bottom) - bottom)) + oh, .45)
    top <- round(top) + shift
    bottom <- round(bottom) - shift
    
    annotation_custom(grob = grid::linesGrob(),
                      xmin = farout + 0.5, xmax = farout + 0.5,
                      ymin = bottom, ymax = top)
  }
  
  draw_bracket_top <- function(x, oh = overhang) {
    top <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymax"] %>% max()
    bottom <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymin"] %>% min()
    shift <- min(max(abs(top - round(top)), abs(round(bottom) - bottom)) + oh, .45)
    top <- round(top) + shift
    bottom <- round(bottom) - shift
    
    annotation_custom(grob = grid::linesGrob(),
                      xmin = farout + 0.5, farout + 1,
                      ymin = top, ymax = top)
  }
  
  draw_bracket_bottom <- function(x, oh = overhang) {
    top <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymax"] %>% max()
    bottom <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymin"] %>% min()
    shift <- min(max(abs(top - round(top)), abs(round(bottom) - bottom)) + oh, .45)
    top <- round(top) + shift
    bottom <- round(bottom) - shift
    
    annotation_custom(grob = grid::linesGrob(), xmin = farout + 0.5, xmax = farout + 1,
                      ymin = bottom, ymax = bottom)
  }
  
  p2 <- p1 +
    theme_bw() +
    theme(plot.title = element_text(colour = NA),
          axis.title.y = element_blank(),
          axis.title.x = element_text(colour = NA),
          axis.text.y = element_blank(),
          axis.text.x = element_text(colour = NA),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.ticks = element_line(colour = NA),
          panel.border = element_rect(colour = NA),
          strip.background = element_rect(colour = NA, fill = NA),
          strip.text.x = element_text(colour = NA),
          plot.margin = unit(c(1,0,2,0), "lines"),
          legend.background = element_rect(colour = NA),
          legend.key = element_rect(colour = NA),
          legend.text = element_text(colour = NA)) +
    coord_cartesian(xlim = c(farout - 1, farout + 1)) +
    theme(legend.position = "none")
  
  for (i in seq(length(brackets))) {
    p2 <- p2 +
      draw_bracket_label(brackets[[i]]) +
      draw_bracket_vert(brackets[[i]]) +
      draw_bracket_top(brackets[[i]]) +
      draw_bracket_bottom(brackets[[i]])
  }
  
  plots <- list(p2, p1)
  grobs <- lapply(plots, function(x) ggplotGrob(x))
  max_heights <- list(do.call(grid::unit.pmax, lapply(grobs, function(x) x$heights)))
  grobs[[1]]$heights <- max_heights[[1]]
  grobs[[2]]$heights <- max_heights[[1]]
  
  pp <- ggplot(data.frame(x = 0:1, y = 0:1), aes_string(x = "x", y = "y")) +
    scale_x_continuous(limits = c(0, 1), expand = c(0, 0)) +
    scale_y_continuous(limits = c(0, 1), expand = c(0, 0)) +
    theme_void() +
    labs(x = NULL, y = NULL) +
    draw_grob(grobs[[1]], 0, 1/9) +
    draw_grob(grobs[[2]], 1/9, 8/9)
  
  return(pp)
}

draw_grob <- function(grob, x, width) {
  layer(data = data.frame(x = NA),
        stat = StatIdentity,
        position = PositionIdentity,
        geom = GeomCustomAnn,
        inherit.aes = FALSE,
        params = list(grob = grob,
                      xmin = x,
                      xmax = width,
                      ymin = 0,
                      ymax = 1))
}

# generate Figure 5
{dotwhisker %>% mutate(term = paste0(model, term), model = group) %>% select(-group) %>% rename(conf.low = lower, conf.high =upper) %>%
    dwplot(dot_args = list("size" = 2.8), whisker_args = list("size" = 1)) + scale_y_discrete(labels = c(expression(Thermometer[Within]), 
                                                                                                         expression(Thermometer[Between]), expression(Thermometer[Within]), expression(Thermometer[Between]))) +
    theme_light() + scale_color_brewer(palette="Set1") + theme(legend.position = "bottom", legend.background = element_rect(colour = NA, fill = NA), text = element_text(family = "Decima WE", color = "grey20", 
                                                                                                                                                                         size = 17)) + labs(color = "Subset") + 
    geom_vline(xintercept = 0, linetype="dashed") + scale_x_continuous(breaks = seq(0, 4.5, 0.5))} %>%
  add_brackets(list(c("Social Distance", "SDth_between", "SDth_within"), c("Dictator Game", "DGth_between", "DGth_within")), face="bold")


## generate Table A.9 - additional controls
d_sd$income <- factor(d_sd$income, levels = c("no_income_at_all", "very_below_average", "below_average",
                                              "average", "above_average", "very_above_average", 
                                              "refuse_to_answer"))
d_dg$income <- factor(d_dg$income, levels = c("no_income_at_all", "very_below_average", "below_average",
                                              "average", "above_average", "very_above_average", 
                                              "refuse_to_answer"))


mod2_sd_controls <- lmer(sd ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave) + sex + rel + migzar + income + left_right_1,  data=d_sd, control = lmerControl(optimizer="Nelder_Mead"))

mod2_dg_controls <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave) + sex + rel + migzar + income + left_right_1,  data=d_dg, control = lmerControl(optimizer="Nelder_Mead"))


stargazer(mod2_sd_controls, mod2_dg_controls, covariate.labels = c("Thermometer \\textsubscript{BETWEEN}", "Thermometer \\textsubscript{WITHIN}", 
                                                                   "Party [ref: Balad-Raam] \\\\ \\-\\hspace{0.5cm}Haavoda-Gesher-Meretz", "\\-\\hspace{0.5cm}Hamahane Hademokrati", "\\-\\hspace{0.5cm}Hareshima Hameshutefet", "\\-\\hspace{0.5cm}Kahol Lavan", "\\-\\hspace{0.5cm}Likud", "\\-\\hspace{0.5cm}Meretz", "\\-\\hspace{0.5cm}Yahadut Hatorah", 
                                                                   "Wave [ref: Wave 1] \\\\ \\-\\hspace{0.5cm}Wave 2", "\\-\\hspace{0.5cm}Wave 3", "\\-\\hspace{0.5cm}Wave 4", "\\-\\hspace{0.5cm}Wave 5", "\\-\\hspace{0.5cm}Wave 6", "\\-\\hspace{0.5cm}Wave 7", "Male", "Religion [ref: Christian] \\\\ \\-\\hspace{0.5cm}Druze",
                                                                   "\\-\\hspace{0.5cm}Jewish", "\\-\\hspace{0.5cm}Muslim",
                                                                   "\\-\\hspace{0.5cm}No religion", "\\-\\hspace{0.5cm}Other",
                                                                   "Religious sector [ref: Dati/Religious] \\\\ \\-\\hspace{0.5cm}Haredi/Ultra-Orthodox",  "\\-\\hspace{0.5cm}Secular",  "\\-\\hspace{0.5cm}Masorti/Traditional", "Income [ref: No income at all] \\\\ \\-\\hspace{0.5cm}Very below averge",
                                                                   "\\-\\hspace{0.5cm}Below average", "\\-\\hspace{0.5cm}Average", "\\-\\hspace{0.5cm}Above average", "\\-\\hspace{0.5cm}Very above average", "\\-\\hspace{0.5cm}Refuse to answer", "Left-Right placement"
), df = F, align=T, header=F, dep.var.labels=c("Social Distance", "Dictator Game"), single.row = T, title="Random Effects Within-Between Models, Additional Controls", table.placement = "H", out.header = F, report=('vc*s'))



### Figure 6
### read published IPP dataset

df <- read_csv("IPP_wide_25_09_22.csv")

pdf <- bind_rows(tibble(lr = df$w1_left_right, party = df$w1_party_vote_intention),
                 tibble(lr = df$w2_left_right, party = df$w2_party_vote_intention),
                 tibble(lr = df$w5_left_right, party = df$w5_vote_choice),
                 tibble(lr = df$w6_left_right, party = df$w6_vote_choice),
                 tibble(lr = df$w7_left_right, party = df$w7_vote_choice)) %>%
  group_by(party) %>%
  summarize(lr = mean(lr, na.rm=T)) %>%
  arrange(lr) %>%
  filter(!is.na(party)) %>%
  filter(!party %in% (c("Other party", "Undecided", "I don't intend to vote", "Blank vote", "Other/Don't Know")))

pmean <- mean(pdf$lr)
pdf <- pdf %>% mutate(bloc = ifelse(lr > pmean, "Right", "Left"))

tdf <- df %>%  select(matches("w[1234567]_thermometer|w[1234567]_sd_.*Wingers|userId"))  %>%
  pivot_longer(cols = contains("thermometer"), names_to = c("wave", "party"), 
               names_pattern = "w(\\d+)_thermometer_(.*)")



tdf$sd_left_wingers <- tdf %>% select(contains("Left-Wingers")) %>% apply(1, mean, na.rm=T)
tdf$sd_right_wingers <- tdf %>% select(contains("Right-Wingers")) %>% apply(1, mean, na.rm=T)

tdf <- tdf %>% left_join(pdf %>% select(party, bloc))


otdf <- tdf %>% group_by(userId, bloc) %>%
  summarize(th = mean(value, na.rm=T), sd_right_wingers = mean(sd_right_wingers),
            sd_left_wingers = mean(sd_left_wingers)) %>%
  filter(!is.na(bloc))

# Exclude middle parties
otdf_nomiddle <- tdf %>% filter(party != "Haavoda-Gesher", party != "Kahol-Lavan", party != "Gesher", party != "Kulanu") %>%
  group_by(userId, bloc) %>%
  summarize(th = mean(value, na.rm=T), sd_right_wingers = mean(sd_right_wingers),
            sd_left_wingers = mean(sd_left_wingers)) %>%
  filter(!is.na(bloc))


mod_left <- lm(data = otdf %>% filter(bloc == "Left"), th ~ sd_left_wingers)
mod_right <- lm(data = otdf %>% filter(bloc == "Right"), th ~ sd_right_wingers)
mod_right_nomiddle <- lm(data = otdf_nomiddle %>% filter(bloc == "Right"), th ~ sd_right_wingers)
mod_left_nomiddle <- lm(data = otdf_nomiddle %>% filter(bloc == "Left"), th ~ sd_left_wingers)


dl <- get_model_data(mod_left, type="pred")$sd_left_wingers %>% as.data.frame() %>% mutate(subset = "Left Wing")
dlm <- get_model_data(mod_left_nomiddle, type="pred")$sd_left_wingers %>% as.data.frame() %>% mutate(subset = "Left Wing, Reduced")
dr <- get_model_data(mod_right, type="pred")$sd_right_wingers %>% as.data.frame() %>% mutate(subset = "Right Wing")
drm <- get_model_data(mod_right_nomiddle, type="pred")$sd_right_wingers %>% as.data.frame() %>% mutate(subset = "Right Wing, Reduced")

ggplot(bind_rows(dl, dr), aes(x, predicted)) + geom_line(aes(color = subset)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = subset), alpha=0.1) +
  #ggtitle("Predicted thermometer score by social distance from bloc's voters") +
  ylab("Predicted thermometer score") + xlab("Social distance value") + theme_light() + 
  theme(text = element_text(size=20)) +
  labs(fill = "Subset", color = "Subset")

ggplot(bind_rows(dl, dlm, dr, drm), aes(x, predicted)) + geom_line(aes(color = subset)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = subset), alpha=0.1) +
  #ggtitle("Predicted thermometer score by social distance from bloc's voters") +
  ylab("Predicted thermometer score") + xlab("Social distance value") + theme_light() + 
  theme(text = element_text(size=20)) +
  labs(fill = "Subset", color = "Subset")

### SD graph

# generate long version of IPP

df_long <- pivot_longer(df, cols = matches("^w\\d+_(thermometer|sd|dg)"), names_to = c("wave", "var", "party"),
                        names_pattern = "w(\\d+)_(.*)_p?(.*)")


df_sd <- df_long %>% filter(var == "sd")
df_th <- df_long %>% filter(var == "thermometer")
df_dg <- df_long %>% filter(var == "dg")

df_th %<>% rename(th = value) %>% select(-var)
df_sd %<>% rename(sd = value) %>% select(-var)
df_dg %<>% rename(dg = value) %>% select(-var)

df_dyadic <- left_join(df_th, df_sd)
df_dyadic <- left_join(df_dyadic, df_dg)

df_dyadic %<>% filter(!is.na(th) | !is.na(sd) | !is.na(dg))

df_dyadic <- df_dyadic %>% relocate(userId, party, wave, th, sd, dg)


totalmean <- df_dyadic %>% filter(wave < 8) %>% group_by(userId, party) %>%
  summarize(sd = sd(th, na.rm=T)) %>% group_by(userId) %>%
  summarize(meansd = mean(sd, na.rm=T)) %>%
  summarize(meansd = mean(meansd, na.rm=T))

# generate Figure 1 

df_dyadic %>% filter(wave < 8) %>% group_by(userId, party) %>%
  summarize(sd = sd(th, na.rm=T)) %>% group_by(userId) %>%
  summarize(meansd = mean(sd, na.rm=T)) %>%
  ggplot(aes(meansd)) + geom_histogram() + theme_light() + xlab("Standard Deviation") + ylab("Count") +
  #ggtitle("Mean individual standard deviation in thermometer values") +
  geom_vline(xintercept = pull(totalmean)) + theme(text = element_text(size=20)) +
  scale_x_continuous(breaks = 0:6)


##### REPLICATION FILE 
##### Validating the feeling thermometer as a measure of partisan affect in multi-party systems 
##### Noam Gidron, Lior Sheffer, Guy Mor 
##### Electoral Studies 
#######################################################################################################
##### This is the replication masterfile producing the analyses in the article. The primary analyses 
##### utilize a draft version of the The Israel Polarization Panel Dataset, 2019–2021 (IPP). Some 
##### additional analyses utilize the published version of the IPP, available here:
##### https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/A6AA4X&version=1.1
#######################################################################################################
##### Data for the textual analyses is loaded here after pre-processing and manual fixes of the 
##### lemmatization of the Hebrew text and machine translation of tokens into English.
##### For the code used for text processing, see text_preprocessing.py 
#######################################################################################################


library(readr)
library(dplyr)
library(ggplot2)
library(magrittr)
library(kableExtra)
library(stargazer)
library(gridExtra)
library(lme4)
library(parameters)
library(grid)
library(tidyr)
library(forcats)
library(dotwhisker)
library(texreg)
library(broom.mixed)
library(sjPlot)

#### TEXT ANALYSIS PLOTS

### read processed files after manual fixes to lemmatization and English translation. See .py file for pre-processing information.

likud_stopwords_rem <- read_csv("likud_elasticnet_predictivewords_w_sentiment_stopwords_removed.csv")
kahollavan_stopwords_rem <- read_csv("kahol_lavan_elasticnet_predictivewords_w_sentiment_stopwords_removed.csv")
avoda_stopwords_rem <- read_csv("avoda_elasticnet_predictivewords_w_sentiment_stopwords_removed.csv")
aguda_stopwords_rem <-  read_csv("aguda_elasticnet_predictivewords_w_sentiment_stopwords_removed.csv")
jointlist_stopwords_rem <- read_csv("joint_list_elasticnet_predictivewords_w_sentiment_stopwords_removed.csv")

likud_pos <- likud_stopwords_rem %>% top_n(20, weight) %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)
likud_neg <- likud_stopwords_rem %>% top_n(20, -weight) %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)

kh_pos <- kahollavan_stopwords_rem %>% top_n(20, weight) %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)
kh_neg <- kahollavan_stopwords_rem %>% top_n(20, -weight) %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)

joint_pos <- jointlist_stopwords_rem %>% top_n(20, weight)  %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)
joint_neg <- jointlist_stopwords_rem %>% top_n(20, -weight)  %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)

aguda_pos <- aguda_stopwords_rem %>% top_n(20, weight)  %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)
aguda_neg <- aguda_stopwords_rem %>% top_n(20, -weight)  %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)

avoda_pos <- avoda_stopwords_rem %>% top_n(20, weight)  %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)
avoda_neg <- avoda_stopwords_rem %>% top_n(20, -weight)  %>% mutate(english_y = paste0(english_y, " (", round(sentiwords_y, 2), ")")) %>% pull(english_y)

# generate Table 1
data.frame(likud_pos, likud_neg,  kh_pos, kh_neg, joint_pos,
           joint_neg, aguda_pos, aguda_neg,avoda_pos, avoda_neg) %>% kable(format = "latex", booktabs=T)

## read output files prior to stopword removal - for analyses in Table 2 and Figure 1

likud <- read_csv("likud_elasticnet_predictivewords_w_sentiment.csv")
kahollavan<- read_csv("kahol_lavan_elasticnet_predictivewords_w_sentiment.csv")
avoda <- read_csv("avoda_elasticnet_predictivewords_w_sentiment.csv")
aguda <-  read_csv("aguda_elasticnet_predictivewords_w_sentiment.csv")
jointlist <- read_csv("joint_list_elasticnet_predictivewords_w_sentiment.csv")

# generate Table 2


m1 <- lm(data=likud, weight ~ sentiwords_y)
m2 <- lm(data=kahollavan, weight ~ sentiwords_y)
m3 <- lm(data=jointlist, weight ~ sentiwords_y)
m4 <- lm(data=aguda, weight ~ sentiwords_y)
m5 <- lm(data=avoda, weight ~ sentiwords_y)


stargazer(m1, m2, m3, m4, m5, type="latex", df = F, align=T, header=F,
          column.labels = c("Likud", "Kahol Lavan", "Joint List", "Aguda", "Avoda"),
          dep.var.labels=c("Predictive weight"),
          covariate.labels = c("Sentiment"), single.row = F, 
          title="Word predictive weight by sentiment polarity",
          table.placement = "H", out.header = F, report=('vcs*'))


##### generate Figure 1
## generate top panel

# plot_label is used to suppress label information for some observations for better readability


sentiwords_q1 <- as.numeric(quantile(likud$sentiwords_y, 0.01))
sentiwords_q2 <- quantile(likud$sentiwords_y, 0.99) %>% as.numeric()

weight_q1 <- quantile(likud$weight, 0.1) %>% as.numeric()
weight_q2 <- quantile(likud$weight, 0.9) %>% as.numeric()

likud$plot_label <- T
likud$plot_label[likud$sentiwords_y == 0] <-F

likud <- likud %>% mutate(plot_label = ifelse((weight > weight_q1) & (weight < weight_q2) & (sentiwords_y > 
                                                                                               sentiwords_q1) & (sentiwords_y < sentiwords_q2),
                                              F, plot_label))



likud_plot <- ggplot(likud, aes(weight, sentiwords_y, color=sentiwords_y)) + 
  geom_text(aes(label = ifelse(plot_label, english_y, "")), size = 5) +
  geom_smooth(method="lm", color = "saddlebrown") + theme_dark() + scale_color_distiller(palette = "RdYlGn", direction = 1) +
  geom_point(data = likud %>% filter(!plot_label), alpha=0.2) + xlab("Predictive weight") + 
  ylab("Sentiment score") +
  labs(color = "Sentiment \npolarity") + ggtitle("Likud") + 
  theme(
    text = element_text(color = "grey20"),
    plot.caption = element_text(hjust = 0, face = "italic")) +
  theme(plot.title = element_text(hjust = 0.5, family = "Decima WE"))



## generate bottom panel


sentiwords_q1 <- as.numeric(quantile(kahollavan$sentiwords_y, 0.01))
sentiwords_q2 <- quantile(kahollavan$sentiwords_y, 0.99) %>% as.numeric()

weight_q1 <- quantile(kahollavan$weight, 0.05) %>% as.numeric()
weight_q2 <- quantile(kahollavan$weight, 0.95) %>% as.numeric()

kahollavan$plot_label <- T
kahollavan$plot_label[kahollavan$sentiwords_y == 0] <-F

kahollavan <- kahollavan %>% mutate(plot_label = ifelse((weight > weight_q1) & (weight < weight_q2) & (sentiwords_y > 
                                                                                                         sentiwords_q1) & (sentiwords_y < sentiwords_q2),
                                                        F, plot_label))


kh_plot <- ggplot(kahollavan, aes(weight, sentiwords_y, color=sentiwords_y)) + 
  geom_text(aes(label = ifelse(plot_label, english_y, "")), size = 4.5) +
  geom_smooth(method="lm", color = "saddlebrown") + theme_dark() + scale_color_distiller(palette = "RdYlGn", direction = 1) +
  geom_point(data = likud %>% filter(!plot_label), alpha=0.2) + xlab("Predictive weight") + 
  ylab("Sentiment score") +
  labs(color = "Sentiment \npolarity") + ggtitle("Kahol Lavan") + 
  theme(
    text = element_text(color = "grey20"),
    plot.caption = element_text(hjust = 0, face = "italic")) +
  theme(plot.title = element_text(hjust = 0.5, family = "Decima WE"))



#extract legend
#https://github.com/hadley/ggplot2/wiki/Share-a-legend-between-two-ggplot2-graphs
g_legend<-function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)}

mylegend<-g_legend(likud_plot)

# generate combined graph for Figure 1
combined <- grid.arrange(arrangeGrob(likud_plot + theme(legend.position="none"),
                                     kh_plot + theme(legend.position="none"),
                                     nrow=2))

################################

### load panel data file - IPP draft

df <- read_csv("all_waves.csv")
df %<>% distinct(uniqueid, .keep_all = T)

# discard unnecessary variables
df %<>% select(uniqueid, left_right_1, sex, rel, edu, osek, ses, migzar, income, vote2015, w3first_vc_str, w5first_vc_str,
               matches("^w\\d(th|sd|dg)"))

# convert to long format
df_long <- pivot_longer(df, cols = matches("^w\\d(th|sd|dg)"), names_to = c("wave", "var", "party"),
                        names_pattern = "w(\\d)(.*)_p?(.*)")


# load party number-to-name link file
link <- read_csv("number2party.csv")

link$wave %<>% as.character()
link$num %<>% as.character()

# join long dataframe with link file
df_long <- left_join(df_long, link, by=c("wave"="wave", "var"="var", "party"="num"))

### create data in dyadic format

df_sd <- df_long %>% filter(var == "sd")
df_th <- df_long %>% filter(var == "th")
df_dg <- df_long %>% filter(var == "dg")

df_th %<>% rename(th = value) %>% select(-var, -party)
df_sd %<>% rename(sd = value) %>% select(-var, -party)
df_dg %<>% rename(dg = value) %>% select(-var, -party)


df_dyadic <- left_join(df_th, df_sd)
df_dyadic <- left_join(df_dyadic, df_dg)

df_dyadic <- df_dyadic %>% distinct()

# reverse scales for thermometer scores and dictator game scores
rev_scale <- function(x){
  (max(x, na.rm = T)) - x
}

df_dyadic$th %<>% rev_scale()
df_dyadic$dg %<>% rev_scale()

# create dyad indentifiers
df_dyadic %<>% mutate(dyadid = paste0(uniqueid,"_",partyname))

# parties to keep - remove parties with no dictator game data
parties <- df_dyadic %>% group_by(partyname) %>% summarize(m = mean(is.na(dg))) %>% filter(m != 1) %>% pull(partyname)

# add demeaned variables to the data
d <- cbind(
  df_dyadic,
  demean(df_dyadic, select = c("th", "sd", "dg"), group = "dyadid") # from package "parameters"
)

# create dataframe for social distance regression and dictator game regression, removing missing data for each
d_sd <- d %>% filter(partyname %in% parties) %>% 
  filter(!is.na(sd), !is.na(th_within), !is.na(th_between))
d_dg <- d %>% filter(partyname %in% parties) %>% 
  filter(!is.na(dg), !is.na(th_within), !is.na(th_between))

#### generate Figure 3


# plotting aid
grid_arrange_shared_legend <- function(...) {
  plots <- list(...)
  g <- ggplotGrob(plots[[1]] + theme(legend.position="bottom"))$grobs
  legend <- g[[which(sapply(g, function(x) x$name) == "guide-box")]]
  lheight <- sum(legend$height)
  grid.arrange(
    do.call(arrangeGrob, lapply(plots, function(x)
      x + theme(legend.position="none"))),
    legend,
    ncol = 1,
    heights = unit.c(unit(1, "npc") - lheight, lheight))
}


## Dot plots - figure 3

d %>% group_by(partyname, wave) %>% summarize(sd = mean(sd, na.rm=T), th = mean(th, na.rm=T)) %>% filter(!is.na(sd)) %>% ggplot(aes(th, sd)) + geom_point(aes(color=partyname), size=7) + geom_text(aes(label = wave)) + scale_color_brewer(palette="Set2") + theme_light() + labs(title = "Social distance", x = "Mean thermometer score", y = "Mean social distance score", color = "Evaluated party") +
  theme(text = element_text(family = "Decima WE", color = "grey20", size = 15), plot.title = element_text(hjust = 0.5)) -> dotssdp

d %>% group_by(partyname, wave) %>% summarize(dg = mean(dg, na.rm=T), th = mean(th, na.rm=T)) %>% filter(!is.na(dg)) %>% ggplot(aes(th, dg)) + geom_point(aes(color=partyname), size=7) + geom_text(aes(label = wave)) + scale_color_brewer(palette="Set2") + theme_light() + labs(title = "Dictator game", x = "Mean thermometer score", y = "Mean dictator game score", color = "Evaluated party") +
  theme(text = element_text(family = "Decima WE", color = "grey20", size = 15), plot.title = element_text(hjust = 0.5)) -> dotsdgp

grid_arrange_shared_legend(dotssdp, dotsdgp)

#### Generate Figure 4

### Correlation plots

df_dyadic %>% group_by(wave, partyname) %>% 
  summarize(cor = cor(th, sd, use="pairwise")) %>%
  filter(!is.na(cor)) -> corthsd

corthsd$partyname %<>% fct_relevel(c("Likud", "Kahol Lavan", "Yahadut Hatorah", "Balad-Raam",
                                     "Hareshima Hameshutefet", "Meretz", "Hamahane Hademokrati", "Haavoda-Gesher-Meretz"))

corthsd$wave %<>% as.numeric()

corthsd %>% 
  ggplot(aes(wave, cor)) + 
  geom_point(aes(group=partyname, color=partyname), size=4) +
  geom_line(aes(group=partyname, color=partyname), size = 2) +
  geom_line(data = corthsd %>% mutate(partyname = fct_collapse(partyname, "Meretz" = c("Meretz", "Hamahane Hademokrati", "Haavoda-Gesher-Meretz"),
                                                               "Balad-Raam" = c("Balad-Raam", "Hareshima Hameshutefet"))),
            aes(group=partyname, color=partyname), linetype="dashed") +
  scale_x_continuous(labels = c("March 19", "April 19 #1", "April 19 #2", "May 19", "Sep 19", "March 20", "May 20"), breaks = 1:max(corthsd$wave)) +
  #ylim(0, 0.55) +
  geom_vline(xintercept = 2.5) +
  geom_vline(xintercept = 4.5) +
  geom_vline(xintercept = 5.5) + 
  annotate("text", x = 2.35, y = 0.2, angle = 90, label = "April Elections", vjust = 1.2, parse = FALSE) +
  annotate("text", x = 4.35, y = 0.2, angle = 90, label = "September Elections", vjust = 1.2, parse = FALSE) +
  annotate("text", x = 5.35, y = 0.2, angle = 90, label = "March Elections", vjust = 1.2, parse = FALSE) +
  theme_light() +
  scale_color_brewer(palette = "Set2") +
  labs(title = "Social distance and feeling thermometer",
       colour = "Evaluated party",
       x = "",
       y = "Correlation") +
  theme(
    text = element_text(family = "Decima WE", color = "grey20", size = 15),
    plot.title = element_text(hjust = 0.5)) +
  scale_y_continuous(breaks = seq(0, 0.6, by = 0.10), limits = c(0, 0.6)) -> sdcorp

df_dyadic %>% group_by(wave, partyname) %>% 
  summarize(cor = cor(th, dg, use="pairwise")) %>%
  filter(!is.na(cor)) -> corthdg

corthdg$partyname %<>% fct_relevel(c("Likud", "Kahol Lavan", "Meretz", "Yahadut Hatorah", "Balad-Raam",
                                     "Hareshima Hameshutefet", "Hamahane Hademokrati"))

corthdg$wave %<>% as.numeric()

corthdg %>% 
  ggplot(aes(wave, cor)) + 
  geom_point(aes(group=partyname, color=partyname), size=4) +
  geom_line(aes(group=partyname, color=partyname), size = 2) +
  geom_line(data = corthdg %>% mutate(partyname = fct_collapse(partyname, "Meretz" = c("Meretz", "Hamahane Hademokrati", "Haavoda-Gesher-Meretz"),
                                                               "Balad-Raam" = c("Balad-Raam", "Hareshima Hameshutefet"))),
            aes(group=partyname, color=partyname), linetype="dashed") +
  scale_x_continuous(labels = c("March 19", "April 19 #1", "April 19 #2", "May 19", "Sep 19", "March 20", "May 20"), breaks = 1:max(corthsd$wave)) +
  #ylim(0, 0.55) +
  geom_vline(xintercept = 2.5) +
  geom_vline(xintercept = 4.5) +
  geom_vline(xintercept = 5.5) + 
  annotate("text", x = 2.35, y = 0.4, angle = 90, label = "April Elections", vjust = 1.2, parse = FALSE) +
  annotate("text", x = 4.35, y = 0.4, angle = 90, label = "September Elections", vjust = 1.2, parse = FALSE) +
  annotate("text", x = 5.35, y = 0.4, angle = 90, label = "March Elections", vjust = 1.2, parse = FALSE) +
  theme_light() +
  scale_color_brewer(palette = "Set2") +
  labs(title = "Dictator game and feeling thermometer",
       colour = "Evaluated party",
       x = "",
       y = "Correlation") +
  theme(
    text = element_text(family = "Decima WE", color = "grey20", size = 15),
    plot.title = element_text(hjust = 0.5)) +
  scale_y_continuous(breaks = seq(0, 0.6, by = 0.10), limits = c(0, 0.6)) -> dgcorp


grid_arrange_shared_legend(sdcorp, dgcorp)

### regression analyses

# without wave fixed effect
mod1_sd <- lmer(sd ~ th_between + th_within + (1 + th_within | uniqueid:partyname) + partyname,  data=d_sd, control = lmerControl(optimizer="Nelder_Mead"))
mod1_dg <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname,  data=d_dg, control = lmerControl(optimizer="Nelder_Mead"))

# with wave fixed effects
mod2_sd <- lmer(sd ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave),  data=d_sd, control = lmerControl(optimizer="Nelder_Mead"))
mod2_dg <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave),  data=d_dg, control = lmerControl(optimizer="Nelder_Mead"))


#### generate Table 3
stargazer(mod1_sd, mod2_sd, mod1_dg, mod2_dg, df = F, single.row = F, report=('vcs*'), align=T, header=F, dep.var.labels=c("Social Distance", "Dictator Game"), title="Random Effects Within-Between Models", table.placement = "H", out.header = F, covariate.labels = c("Thermometer \\textsubscript{BETWEEN}", "Thermometer \\textsubscript{WITHIN}"),
          omit = c("wave", "partyname", "Constant"), add.lines = list(c("Controls", "\\faClose", "\\faCheck", "\\faClose", "\\faCheck")))

### generate Table A.4 in SI

stargazer(mod1_sd, mod2_sd, mod1_dg, mod2_dg, covariate.labels = c("Thermometer \\textsubscript{BETWEEN}", "Thermometer \\textsubscript{WITHIN}", 
                                                                   "Party [ref: Balad-Raam] \\\\ \\-\\hspace{0.5cm}Haavoda-Gesher-Meretz", "\\-\\hspace{0.5cm}Hamahane Hademokrati", "\\-\\hspace{0.5cm}Hareshima Hameshutefet", "\\-\\hspace{0.5cm}Kahol Lavan", "\\-\\hspace{0.5cm}Likud", "\\-\\hspace{0.5cm}Meretz", "\\-\\hspace{0.5cm}Yahadut Hatorah", 
                                                                   "Wave [ref: Wave 1] \\\\ \\-\\hspace{0.5cm}Wave 2", "\\-\\hspace{0.5cm}Wave 3", "\\-\\hspace{0.5cm}Wave 4", "\\-\\hspace{0.5cm}Wave 5", "\\-\\hspace{0.5cm}Wave 6", "\\-\\hspace{0.5cm}Wave 7"), df = F, align=T, header=F, dep.var.labels=c("Social Distance", "Dictator Game"), single.row = T, title="Random Effects Within-Between Models", table.placement = "H", out.header = F, report='vc*s')



### generate Tables A.7 and A.8

# fit models without random slopes (random intercepts only)
mod1_sd_comp <- lmer(sd ~ th_between + th_within + (1 | uniqueid:partyname) + partyname,  data=d_sd, control = lmerControl(optimizer="Nelder_Mead"))
mod1_dg_comp <- lmer(dg ~ th_between + th_within + (1 | uniqueid:partyname) + partyname,  data=d_dg, control = lmerControl(optimizer="Nelder_Mead"))

# run likelihood ratio test comparing the previous random slopes model and the random intercepts model
anova(mod1_sd_comp, mod1_sd) -> sd_anova
anova(mod1_dg_comp, mod1_dg) -> dg_anova

# print Table A.7
sd_anova %>% kable(format = "latex")
# print Table A.8
dg_anova %>% kable(format = "latex")


### generate tables A.5 and A.6 in SI

# recreate data with demeaned variables, removing observations without voting information
d <- cbind(
  df_dyadic,
  demean(df_dyadic, select = c("th", "sd", "dg"), group = "dyadid") # from package "parameters"
) %>% filter(!is.na(w3first_vc_str))

# remove in-partisans
d$remove <- F
d$remove[d$partyname == "Likud" & d$w3first_vc_str == "likud"] <- T
d$remove[d$partyname == "Kahol Lavan" & d$w3first_vc_str == "kahol_lavan"] <- T
d$remove[d$partyname == "Haavoda-Gesher-Meretz" & d$w3first_vc_str == "avoda"] <- T
d$remove[d$partyname == "Meretz" & d$w3first_vc_str == "meretz"] <- T
d$remove[d$partyname == "Haavoda-Gesher-Meretz" & d$w3first_vc_str == "meretz"] <- T
d$remove[d$partyname == "Haavoda-Gesher-Meretz" & d$w3first_vc_str == "gesher"] <- T

d$remove[d$partyname == "Hamahane Hademokrati" & d$w3first_vc_str == "meretz"] <- T
d$remove[d$partyname == "Yahadut Hatorah" & d$w3first_vc_str == "yahadut_hatora"] <- T
d$remove[d$partyname == "Balad-Raam" & d$w3first_vc_str == "balad_raam"] <- T

d$remove[d$partyname == "Hareshima Hameshutefet" & d$w3first_vc_str == "balad_raam"] <- T
d$remove[d$partyname == "Hareshima Hameshutefet" & d$w3first_vc_str == "hadash_taal"] <- T

# create dataframes for out-partisans and in-partisans
d_nonpartisan <- d %>% filter(!remove)
d_partisan <- d %>% filter(remove)

d_sd_nonpartisan <- d_nonpartisan %>% filter(partyname %in% parties) %>% 
  filter(!is.na(sd), !is.na(th_within), !is.na(th_between))
d_dg_nonpartisan <- d_nonpartisan %>% filter(partyname %in% parties) %>% 
  filter(!is.na(dg), !is.na(th_within), !is.na(th_between))

d_sd_partisan <- d_partisan %>% filter(partyname %in% parties) %>% 
  filter(!is.na(sd), !is.na(th_within), !is.na(th_between))
d_dg_partisan <- d_partisan %>% filter(partyname %in% parties) %>% 
  filter(!is.na(dg), !is.na(th_within), !is.na(th_between))

# rerun models on outpartisan and inpartisan dataframes
mod1_sd_nonpartisan <- lmer(sd ~ th_between + th_within + (1 + th_within | uniqueid:partyname) + partyname,  data=d_sd_nonpartisan, control = lmerControl(optimizer="Nelder_Mead"))
mod1_dg_nonpartisan <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname,  data=d_dg_nonpartisan, control = lmerControl(optimizer="Nelder_Mead"))
mod2_sd_nonpartisan <- lmer(sd ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave),  data=d_sd_nonpartisan, control = lmerControl(optimizer="Nelder_Mead"))
mod2_dg_nonpartisan <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave),  data=d_dg_nonpartisan, control = lmerControl(optimizer="Nelder_Mead"))


mod1_sd_partisan <- lmer(sd ~ th_between + th_within + (1 + th_within | uniqueid:partyname) + partyname,  data=d_sd_partisan, control = lmerControl(optimizer="Nelder_Mead"))
mod1_dg_partisan <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname,  data=d_dg_partisan, control = lmerControl(optimizer="Nelder_Mead"))
mod2_sd_partisan <- lmer(sd ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave),  data=d_sd_partisan, control = lmerControl(optimizer="Nelder_Mead"))
mod2_dg_partisan <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave),  data=d_dg_partisan, control = lmerControl(optimizer="Nelder_Mead"))

# print Table A.5
stargazer(mod1_sd_nonpartisan, mod2_sd_nonpartisan, mod1_dg_nonpartisan, mod2_dg_nonpartisan, covariate.labels = c("Thermometer \\textsubscript{BETWEEN}", "Thermometer \\textsubscript{WITHIN}", 
                                                                                                                   "Party [ref: Balad-Raam] \\\\ \\-\\hspace{0.5cm}Haavoda-Gesher-Meretz", "\\-\\hspace{0.5cm}Hamahane Hademokrati", "\\-\\hspace{0.5cm}Hareshima Hameshutefet", "\\-\\hspace{0.5cm}Kahol Lavan", "\\-\\hspace{0.5cm}Likud", "\\-\\hspace{0.5cm}Meretz", "\\-\\hspace{0.5cm}Yahadut Hatorah", 
                                                                                                                   "Wave [ref: Wave 1] \\\\ \\-\\hspace{0.5cm}Wave 2", "\\-\\hspace{0.5cm}Wave 3", "\\-\\hspace{0.5cm}Wave 4", "\\-\\hspace{0.5cm}Wave 5", "\\-\\hspace{0.5cm}Wave 6", "\\-\\hspace{0.5cm}Wave 7"), df = F, align=T, header=F, dep.var.labels=c("Social Distance", "Dictator Game"), single.row = T, title="Random Effects Within-Between Models, Non-partisans", table.placement = "H", out.header = F, report=('vc*s'))

# print Table A.6
stargazer(mod1_sd_partisan, mod2_sd_partisan, mod1_dg_partisan, mod2_dg_partisan, covariate.labels = c("Thermometer \\textsubscript{BETWEEN}", "Thermometer \\textsubscript{WITHIN}", 
                                                                                                       "Party [ref: Balad-Raam] \\\\ \\-\\hspace{0.5cm}Haavoda-Gesher-Meretz", "\\-\\hspace{0.5cm}Hamahane Hademokrati", "\\-\\hspace{0.5cm}Hareshima Hameshutefet", "\\-\\hspace{0.5cm}Kahol Lavan", "\\-\\hspace{0.5cm}Likud", "\\-\\hspace{0.5cm}Meretz", "\\-\\hspace{0.5cm}Yahadut Hatorah", 
                                                                                                       "Wave [ref: Wave 1] \\\\ \\-\\hspace{0.5cm}Wave 2", "\\-\\hspace{0.5cm}Wave 3", "\\-\\hspace{0.5cm}Wave 4", "\\-\\hspace{0.5cm}Wave 5", "\\-\\hspace{0.5cm}Wave 6", "\\-\\hspace{0.5cm}Wave 7"), df = F, align=T, header=F, dep.var.labels=c("Social Distance", "Dictator Game"), single.row = T, title="Random Effects Within-Between Models, Partisans", table.placement = "H", out.header = F, report=('vc*s'))

##### create Figure 5 - whisker graph

sd_all <- left_join(add_rownames(as.data.frame(confint(mod1_sd, method="Wald"))),
                    tidy(mod1_sd),
                    by=c("rowname"="term")) %>% 
  mutate(group = "All", model = "SD") %>% select(lower = `2.5 %`, upper = `97.5 %`, estimate, term=rowname, group, model) %>% filter(term %in% c("th_between", "th_within"))

dg_all <- left_join(add_rownames(as.data.frame(confint(mod1_dg, method="Wald"))),
                    tidy(mod1_dg),
                    by=c("rowname"="term")) %>% 
  mutate(group = "All", model = "DG") %>% select(lower = `2.5 %`, upper = `97.5 %`, estimate, term=rowname, group, model) %>% filter(term %in% c("th_between", "th_within"))

sd_partisan <- left_join(add_rownames(as.data.frame(confint(mod1_sd_partisan, method="Wald"))),
                         tidy(mod1_sd_partisan),
                         by=c("rowname"="term")) %>% 
  mutate(group = "In-partisans", model = "SD") %>% select(lower = `2.5 %`, upper = `97.5 %`, estimate, term=rowname, group, model) %>% filter(term %in% c("th_between", "th_within"))

dg_partisan <- left_join(add_rownames(as.data.frame(confint(mod1_dg_partisan, method="Wald"))),
                         tidy(mod1_dg_partisan),
                         by=c("rowname"="term")) %>% 
  mutate(group = "In-partisans", model = "DG") %>% select(lower = `2.5 %`, upper = `97.5 %`, estimate, term=rowname, group, model) %>% filter(term %in% c("th_between", "th_within"))

sd_outpartisan <- left_join(add_rownames(as.data.frame(confint(mod1_sd_nonpartisan, method="Wald"))),
                            tidy(mod1_sd_nonpartisan),
                            by=c("rowname"="term")) %>% 
  mutate(group = "Out-partisans", model = "SD") %>% select(lower = `2.5 %`, upper = `97.5 %`, estimate, term=rowname, group, model) %>% filter(term %in% c("th_between", "th_within"))

dg_outpartisan <- left_join(add_rownames(as.data.frame(confint(mod1_dg_nonpartisan, method="Wald"))),
                            tidy(mod1_dg_nonpartisan),
                            by=c("rowname"="term")) %>% 
  mutate(group = "Out-partisans", model = "DG") %>% select(lower = `2.5 %`, upper = `97.5 %`, estimate, term=rowname, group, model) %>% filter(term %in% c("th_between", "th_within"))

dotwhisker <- bind_rows(sd_all, dg_all, sd_partisan, dg_partisan, sd_outpartisan,
                        dg_outpartisan)


# plotting aids

add_brackets <- function(p, brackets, face = "italic") {
  y_ind <- term <- estimate <- ymax <- ymin <- x <- NULL # not functional, just for CRAN check
  
  coef_layer <- 0
  repeat {
    coef_layer <- coef_layer + 1
    if ("x" %in% names(layer_data(p, i = coef_layer))) break
  }
  
  if (p$args$style == "dotwhisker") {
    pd <- left_join(p$data %>% mutate(xx = signif(estimate, 9)),
                    layer_data(p, i = coef_layer) %>% mutate(xx = signif(x, 9)), by = "xx") %>%
      left_join(layer_data(p, i = coef_layer - 1),
                by = c("colour", "y", "group", "PANEL", "ymin", "ymax", "xmax", "size", "alpha"))
  } else {
    pd <- left_join(p$data %>% mutate(xx = signif(estimate, 9)),
                    layer_data(p, i = coef_layer) %>% mutate(xx = signif(x, 9)), by = "xx")
    pd <- pd %>%
      mutate(ymin = y_ind,
             ymax = y_ind)
  }
  overhang <- max(pd$y_ind)/30
  overhang <- ifelse(overhang > .23, .23, overhang)
  farout <- ifelse(p$args$style == "distribution", max(pd$x, na.rm = TRUE) + 100, max(pd$xmax, na.rm = TRUE) + 100)
  p1 <- p + theme(plot.margin = unit(c(1, 1, 1, -1), "lines")) + ylab("")
  
  if (!is.list(brackets)) stop('Error: argument "brackets" is not a list')
  
  draw_bracket_label <- function(x, f = face) {
    top <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymax"] %>% max()
    bottom <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymin"] %>% min()
    shift <- max(abs(top - round(top)), abs(round(bottom) - bottom))
    top <- round(top) + shift
    bottom <- round(bottom) - shift
    
    annotation_custom(
      grob = grid::textGrob(label = x[1], gp = grid::gpar(cex = 1.2, fontface = f), rot = 90),
      xmin = farout, xmax = farout,
      ymin = (top + bottom)/2, ymax = (top + bottom)/2)
  }
  
  draw_bracket_vert <- function(x, oh = overhang) {
    top <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymax"] %>% max()
    bottom <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymin"] %>% min()
    shift <- min(max(abs(top - round(top)), abs(round(bottom) - bottom)) + oh, .45)
    top <- round(top) + shift
    bottom <- round(bottom) - shift
    
    annotation_custom(grob = grid::linesGrob(),
                      xmin = farout + 0.5, xmax = farout + 0.5,
                      ymin = bottom, ymax = top)
  }
  
  draw_bracket_top <- function(x, oh = overhang) {
    top <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymax"] %>% max()
    bottom <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymin"] %>% min()
    shift <- min(max(abs(top - round(top)), abs(round(bottom) - bottom)) + oh, .45)
    top <- round(top) + shift
    bottom <- round(bottom) - shift
    
    annotation_custom(grob = grid::linesGrob(),
                      xmin = farout + 0.5, farout + 1,
                      ymin = top, ymax = top)
  }
  
  draw_bracket_bottom <- function(x, oh = overhang) {
    top <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymax"] %>% max()
    bottom <- pd[which((pd$term == x[2] | pd$term == x[3]) & !is.na(pd$estimate)), "ymin"] %>% min()
    shift <- min(max(abs(top - round(top)), abs(round(bottom) - bottom)) + oh, .45)
    top <- round(top) + shift
    bottom <- round(bottom) - shift
    
    annotation_custom(grob = grid::linesGrob(), xmin = farout + 0.5, xmax = farout + 1,
                      ymin = bottom, ymax = bottom)
  }
  
  p2 <- p1 +
    theme_bw() +
    theme(plot.title = element_text(colour = NA),
          axis.title.y = element_blank(),
          axis.title.x = element_text(colour = NA),
          axis.text.y = element_blank(),
          axis.text.x = element_text(colour = NA),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.ticks = element_line(colour = NA),
          panel.border = element_rect(colour = NA),
          strip.background = element_rect(colour = NA, fill = NA),
          strip.text.x = element_text(colour = NA),
          plot.margin = unit(c(1,0,2,0), "lines"),
          legend.background = element_rect(colour = NA),
          legend.key = element_rect(colour = NA),
          legend.text = element_text(colour = NA)) +
    coord_cartesian(xlim = c(farout - 1, farout + 1)) +
    theme(legend.position = "none")
  
  for (i in seq(length(brackets))) {
    p2 <- p2 +
      draw_bracket_label(brackets[[i]]) +
      draw_bracket_vert(brackets[[i]]) +
      draw_bracket_top(brackets[[i]]) +
      draw_bracket_bottom(brackets[[i]])
  }
  
  plots <- list(p2, p1)
  grobs <- lapply(plots, function(x) ggplotGrob(x))
  max_heights <- list(do.call(grid::unit.pmax, lapply(grobs, function(x) x$heights)))
  grobs[[1]]$heights <- max_heights[[1]]
  grobs[[2]]$heights <- max_heights[[1]]
  
  pp <- ggplot(data.frame(x = 0:1, y = 0:1), aes_string(x = "x", y = "y")) +
    scale_x_continuous(limits = c(0, 1), expand = c(0, 0)) +
    scale_y_continuous(limits = c(0, 1), expand = c(0, 0)) +
    theme_void() +
    labs(x = NULL, y = NULL) +
    draw_grob(grobs[[1]], 0, 1/9) +
    draw_grob(grobs[[2]], 1/9, 8/9)
  
  return(pp)
}

draw_grob <- function(grob, x, width) {
  layer(data = data.frame(x = NA),
        stat = StatIdentity,
        position = PositionIdentity,
        geom = GeomCustomAnn,
        inherit.aes = FALSE,
        params = list(grob = grob,
                      xmin = x,
                      xmax = width,
                      ymin = 0,
                      ymax = 1))
}

# generate Figure 5
{dotwhisker %>% mutate(term = paste0(model, term), model = group) %>% select(-group) %>% rename(conf.low = lower, conf.high =upper) %>%
    dwplot(dot_args = list("size" = 2.8), whisker_args = list("size" = 1)) + scale_y_discrete(labels = c(expression(Thermometer[Within]), 
                                                                                                         expression(Thermometer[Between]), expression(Thermometer[Within]), expression(Thermometer[Between]))) +
    theme_light() + scale_color_brewer(palette="Set1") + theme(legend.position = "bottom", legend.background = element_rect(colour = NA, fill = NA), text = element_text(family = "Decima WE", color = "grey20", 
                                                                                                                                                                         size = 17)) + labs(color = "Subset") + 
    geom_vline(xintercept = 0, linetype="dashed") + scale_x_continuous(breaks = seq(0, 4.5, 0.5))} %>%
  add_brackets(list(c("Social Distance", "SDth_between", "SDth_within"), c("Dictator Game", "DGth_between", "DGth_within")), face="bold")


## generate Table A.9 - additional controls
d_sd$income <- factor(d_sd$income, levels = c("no_income_at_all", "very_below_average", "below_average",
                                              "average", "above_average", "very_above_average", 
                                              "refuse_to_answer"))
d_dg$income <- factor(d_dg$income, levels = c("no_income_at_all", "very_below_average", "below_average",
                                              "average", "above_average", "very_above_average", 
                                              "refuse_to_answer"))


mod2_sd_controls <- lmer(sd ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave) + sex + rel + migzar + income + left_right_1,  data=d_sd, control = lmerControl(optimizer="Nelder_Mead"))

mod2_dg_controls <- lmer(dg ~ th_between + th_within + (1 + th_within  | uniqueid:partyname) + partyname + as.factor(wave) + sex + rel + migzar + income + left_right_1,  data=d_dg, control = lmerControl(optimizer="Nelder_Mead"))


stargazer(mod2_sd_controls, mod2_dg_controls, covariate.labels = c("Thermometer \\textsubscript{BETWEEN}", "Thermometer \\textsubscript{WITHIN}", 
                                                                   "Party [ref: Balad-Raam] \\\\ \\-\\hspace{0.5cm}Haavoda-Gesher-Meretz", "\\-\\hspace{0.5cm}Hamahane Hademokrati", "\\-\\hspace{0.5cm}Hareshima Hameshutefet", "\\-\\hspace{0.5cm}Kahol Lavan", "\\-\\hspace{0.5cm}Likud", "\\-\\hspace{0.5cm}Meretz", "\\-\\hspace{0.5cm}Yahadut Hatorah", 
                                                                   "Wave [ref: Wave 1] \\\\ \\-\\hspace{0.5cm}Wave 2", "\\-\\hspace{0.5cm}Wave 3", "\\-\\hspace{0.5cm}Wave 4", "\\-\\hspace{0.5cm}Wave 5", "\\-\\hspace{0.5cm}Wave 6", "\\-\\hspace{0.5cm}Wave 7", "Male", "Religion [ref: Christian] \\\\ \\-\\hspace{0.5cm}Druze",
                                                                   "\\-\\hspace{0.5cm}Jewish", "\\-\\hspace{0.5cm}Muslim",
                                                                   "\\-\\hspace{0.5cm}No religion", "\\-\\hspace{0.5cm}Other",
                                                                   "Religious sector [ref: Dati/Religious] \\\\ \\-\\hspace{0.5cm}Haredi/Ultra-Orthodox",  "\\-\\hspace{0.5cm}Secular",  "\\-\\hspace{0.5cm}Masorti/Traditional", "Income [ref: No income at all] \\\\ \\-\\hspace{0.5cm}Very below averge",
                                                                   "\\-\\hspace{0.5cm}Below average", "\\-\\hspace{0.5cm}Average", "\\-\\hspace{0.5cm}Above average", "\\-\\hspace{0.5cm}Very above average", "\\-\\hspace{0.5cm}Refuse to answer", "Left-Right placement"
), df = F, align=T, header=F, dep.var.labels=c("Social Distance", "Dictator Game"), single.row = T, title="Random Effects Within-Between Models, Additional Controls", table.placement = "H", out.header = F, report=('vc*s'))



### Figure 6
### read published IPP dataset

df <- read_csv("~/Downloads/IPP_wide_25_09_22.csv")

pdf <- bind_rows(tibble(lr = df$w1_left_right, party = df$w1_party_vote_intention),
                 tibble(lr = df$w2_left_right, party = df$w2_party_vote_intention),
                 tibble(lr = df$w5_left_right, party = df$w5_vote_choice),
                 tibble(lr = df$w6_left_right, party = df$w6_vote_choice),
                 tibble(lr = df$w7_left_right, party = df$w7_vote_choice)) %>%
  group_by(party) %>%
  summarize(lr = mean(lr, na.rm=T)) %>%
  arrange(lr) %>%
  filter(!is.na(party)) %>%
  filter(!party %in% (c("Other party", "Undecided", "I don't intend to vote", "Blank vote", "Other/Don't Know")))

pmean <- mean(pdf$lr)
pdf <- pdf %>% mutate(bloc = ifelse(lr > pmean, "Right", "Left"))

tdf <- df %>%  select(matches("w[1234567]_thermometer|w[1234567]_sd_.*Wingers|userId"))  %>%
  pivot_longer(cols = contains("thermometer"), names_to = c("wave", "party"), 
               names_pattern = "w(\\d+)_thermometer_(.*)")



tdf$sd_left_wingers <- tdf %>% select(contains("Left-Wingers")) %>% apply(1, mean, na.rm=T)
tdf$sd_right_wingers <- tdf %>% select(contains("Right-Wingers")) %>% apply(1, mean, na.rm=T)

tdf <- tdf %>% left_join(pdf %>% select(party, bloc))


otdf <- tdf %>% group_by(userId, bloc) %>%
  summarize(th = mean(value, na.rm=T), sd_right_wingers = mean(sd_right_wingers),
            sd_left_wingers = mean(sd_left_wingers)) %>%
  filter(!is.na(bloc))

# Exclude middle parties
otdf_nomiddle <- tdf %>% filter(party != "Haavoda-Gesher", party != "Kahol-Lavan", party != "Gesher", party != "Kulanu") %>%
  group_by(userId, bloc) %>%
  summarize(th = mean(value, na.rm=T), sd_right_wingers = mean(sd_right_wingers),
            sd_left_wingers = mean(sd_left_wingers)) %>%
  filter(!is.na(bloc))


mod_left <- lm(data = otdf %>% filter(bloc == "Left"), th ~ sd_left_wingers)
mod_right <- lm(data = otdf %>% filter(bloc == "Right"), th ~ sd_right_wingers)
mod_right_nomiddle <- lm(data = otdf_nomiddle %>% filter(bloc == "Right"), th ~ sd_right_wingers)
mod_left_nomiddle <- lm(data = otdf_nomiddle %>% filter(bloc == "Left"), th ~ sd_left_wingers)


dl <- get_model_data(mod_left, type="pred")$sd_left_wingers %>% as.data.frame() %>% mutate(subset = "Left Wing")
dlm <- get_model_data(mod_left_nomiddle, type="pred")$sd_left_wingers %>% as.data.frame() %>% mutate(subset = "Left Wing, Reduced")
dr <- get_model_data(mod_right, type="pred")$sd_right_wingers %>% as.data.frame() %>% mutate(subset = "Right Wing")
drm <- get_model_data(mod_right_nomiddle, type="pred")$sd_right_wingers %>% as.data.frame() %>% mutate(subset = "Right Wing, Reduced")

ggplot(bind_rows(dl, dr), aes(x, predicted)) + geom_line(aes(color = subset)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = subset), alpha=0.1) +
  #ggtitle("Predicted thermometer score by social distance from bloc's voters") +
  ylab("Predicted thermometer score") + xlab("Social distance value") + theme_light() + 
  theme(text = element_text(size=20)) +
  labs(fill = "Subset", color = "Subset")

ggplot(bind_rows(dl, dlm, dr, drm), aes(x, predicted)) + geom_line(aes(color = subset)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = subset), alpha=0.1) +
  #ggtitle("Predicted thermometer score by social distance from bloc's voters") +
  ylab("Predicted thermometer score") + xlab("Social distance value") + theme_light() + 
  theme(text = element_text(size=20)) +
  labs(fill = "Subset", color = "Subset")

### SD graph

# generate long version of IPP

df_long <- pivot_longer(df, cols = matches("^w\\d+_(thermometer|sd|dg)"), names_to = c("wave", "var", "party"),
                        names_pattern = "w(\\d+)_(.*)_p?(.*)")


df_sd <- df_long %>% filter(var == "sd")
df_th <- df_long %>% filter(var == "thermometer")
df_dg <- df_long %>% filter(var == "dg")

df_th %<>% rename(th = value) %>% select(-var)
df_sd %<>% rename(sd = value) %>% select(-var)
df_dg %<>% rename(dg = value) %>% select(-var)

df_dyadic <- left_join(df_th, df_sd)
df_dyadic <- left_join(df_dyadic, df_dg)

df_dyadic %<>% filter(!is.na(th) | !is.na(sd) | !is.na(dg))

df_dyadic <- df_dyadic %>% relocate(userId, party, wave, th, sd, dg)


totalmean <- df_dyadic %>% filter(wave < 8) %>% group_by(userId, party) %>%
  summarize(sd = sd(th, na.rm=T)) %>% group_by(userId) %>%
  summarize(meansd = mean(sd, na.rm=T)) %>%
  summarize(meansd = mean(meansd, na.rm=T))

# generate Figure 1 

df_dyadic %>% filter(wave < 8) %>% group_by(userId, party) %>%
  summarize(sd = sd(th, na.rm=T)) %>% group_by(userId) %>%
  summarize(meansd = mean(sd, na.rm=T)) %>%
  ggplot(aes(meansd)) + geom_histogram() + theme_light() + xlab("Standard Deviation") + ylab("Count") +
  #ggtitle("Mean individual standard deviation in thermometer values") +
  geom_vline(xintercept = pull(totalmean)) + theme(text = element_text(size=20)) +
  scale_x_continuous(breaks = 0:6)

# figure A.1 in SI
sd_by_party <- df_dyadic %>% group_by(userId, party) %>%
  summarize(sd = sd(th, na.rm=T)) %>%
  group_by(party) %>% summarize(msd = mean(sd, na.rm=T))


df_dyadic %>% filter(wave < 8) %>% group_by(userId, party) %>%
  summarize(sd = sd(th, na.rm=T)) %>%
  filter(!is.na(sd)) %>% filter(!party %in% c("Haavoda-Gesher", "Hamahane-Hademokrati", "Hacalcalit-Hahadasha", "Hazionut-Hadatit",
                                              "Raam", "Tikva-Hadasha", "Yesh-Atid")) %>%
  ggplot(aes(sd)) + geom_histogram() + facet_wrap(~party) +
  geom_vline(data = sd_by_party %>% filter(!party %in% c("Haavoda-Gesher", "Hamahane-Hademokrati", "Hacalcalit-Hahadasha", "Hazionut-Hadatit",
                                                         "Raam", "Tikva-Hadasha", "Yesh-Atid")), aes(xintercept = msd)) +
  theme_light() + xlab("Standard Deviation") + ylab("Count") + theme(text = element_text(size=20))
